library(tidyverse)
āā Attaching core tidyverse packages āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā tidyverse 2.0.0 āā
ā dplyr 1.1.4 ā readr 2.1.5
ā forcats 1.0.0 ā stringr 1.5.1
ā ggplot2 3.5.1 ā tibble 3.2.1
ā lubridate 1.9.4 ā tidyr 1.3.1
ā purrr 1.0.4 āā Conflicts āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā tidyverse_conflicts() āā
ā dplyr::filter() masks stats::filter()
ā dplyr::lag() masks stats::lag()
ā¹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(WGCNA)
Loading required package: dynamicTreeCut
Loading required package: fastcluster
Attaching package: āfastclusterā
The following object is masked from āpackage:statsā:
hclust
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'rmarkdown':
method from
print.paged_df
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: āWGCNAā
The following object is masked from āpackage:statsā:
cor
library(gplots)
Attaching package: āgplotsā
The following object is masked from āpackage:statsā:
lowess
options(stringsAsFactors = FALSE)
load sample info
sample.description <- read.csv("../input/sample.description.csv")
load reads
lcpm <- read.csv("../output/log2cpm.csv.gz", row.names = 1, check.names = FALSE)
head(lcpm)
dim(lcpm)
[1] 26704 48
Filter for sporophyte samples:
sample.description <- sample.description %>% filter(str_detect(tissue, "S5|WYS"))
lcpm <- lcpm[,sample.description$sample]
Filter for genes with the highest coefficient of variation
CV <- apply(lcpm, 1, \(x) abs(sd(x)/mean(x)))
hist(log10(CV))
names(CV) <- rownames(lcpm)
CV[str_detect(names(CV), "18G076300|33G031700")]
Ceric.18G076300.v2.1 Ceric.33G031700.v2.1
0.08763174 0.18394629
quantile(CV, 0.23)
23%
0.08608846
lcpm.filter <- lcpm[CV > quantile(CV, 0.23),]
dim(lcpm.filter)
[1] 20562 30
WGCNA wants genes in columns
lcpm.filter.t <- t(lcpm.filter)
Soft thresholding
powers <- c(c(1:10), seq(from = 12, to=20, by=2))
sft <- pickSoftThreshold(lcpm.filter.t, powerVector = powers, verbose = 5,networkType = "signed hybrid", blockSize = 15000)
pickSoftThreshold: calculating connectivity for given powers...
..working on genes 1 through 15000 of 20562
Warning: executing %dopar% sequentially: no parallel backend registered
..working on genes 15001 through 20562 of 20562
sizeGrWindow(9, 5)
par(mfrow = c(1,2))
cex1 <- 0.9
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
choose 8
softPower <- 8
adjacency <- adjacency(lcpm.filter.t, power = softPower, type = "signed hybrid")
# Turn adjacency into topological overlap
TOM <- TOMsimilarity(adjacency, TOMType = "signed");
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM <- 1-TOM
# Call the hierarchical clustering function
geneTree <- hclust(as.dist(dissTOM), method = "average")
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04)
define modules
# We like large modules, so we set the minimum module size relatively high:
minModuleSize <- 30;
# Module identification using dynamic tree cut:
dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit <- 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
2597 1248 1139 890 791 778 659 653 508 498 482 459 436 428 421 408 404 380 376 367 350 336 314 312 308 306 283 273
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
251 246 243 236 226 198 198 190 184 174 172 170 145 143 142 121 118 113 110 107 98 94 93 87 87 77 74 61
# Convert numeric labels into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
bisque4 black blue brown brown4 cyan darkgreen darkgrey
110 659 1248 1139 113 428 336 312
darkmagenta darkolivegreen darkorange darkorange2 darkred darkslateblue darkturquoise floralwhite
198 226 306 118 350 107 314 121
green greenyellow grey60 ivory lightcyan lightcyan1 lightgreen lightpink4
791 482 404 142 408 143 380 61
lightsteelblue1 lightyellow magenta maroon mediumpurple3 midnightblue navajowhite2 orange
145 376 508 74 170 421 77 308
orangered4 paleturquoise palevioletred3 pink plum1 plum2 purple red
172 243 87 653 174 98 498 778
royalblue saddlebrown salmon salmon4 sienna3 skyblue skyblue3 steelblue
367 251 436 87 198 273 184 246
tan thistle1 thistle2 turquoise violet white yellow yellowgreen
459 93 94 2597 236 283 890 190
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
merge similar modules
# Calculate eigengenes
MEList <- moduleEigengenes(lcpm.filter.t, colors = dynamicColors)
MEs <- MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss <- 1-cor(MEs);
# Cluster module eigengenes
METree <- hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
merge with correlation > 0.75
MEDissThres = 0.25
# Plot the cut line into the dendrogram
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(lcpm.filter.t, dynamicColors, cutHeight = MEDissThres, verbose = 3)
mergeCloseModules: Merging modules whose distance is less than 0.25
multiSetMEs: Calculating module MEs.
Working on set 1 ...
moduleEigengenes: Calculating 56 module eigengenes in given set.
multiSetMEs: Calculating module MEs.
Working on set 1 ...
moduleEigengenes: Calculating 31 module eigengenes in given set.
multiSetMEs: Calculating module MEs.
Working on set 1 ...
moduleEigengenes: Calculating 26 module eigengenes in given set.
Calculating new MEs...
multiSetMEs: Calculating module MEs.
Working on set 1 ...
moduleEigengenes: Calculating 26 module eigengenes in given set.
# The merged module colors
mergedColors = merge$colors
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs
compare pre and post merge
sizeGrWindow(12, 9)
#pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
#dev.off()
# Rename to moduleColors
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs
table(merge$colors)
black blue brown cyan darkgreen darkgrey darkorange2 darkred
659 2521 3736 428 423 775 118 730
darkslateblue green greenyellow grey60 ivory lightcyan lightcyan1 lightsteelblue1
107 1250 482 825 1032 1439 1381 145
magenta maroon mediumpurple3 orangered4 plum2 saddlebrown salmon4 skyblue
912 261 170 2120 98 251 87 273
steelblue thistle1
246 93
length(table(merge$colors))
[1] 26
median(table(merge$colors))
[1] 455
Which module is LFY in?
Interesting: they are in different modules(!). But both are quite largeā¦
module.assignment %>% group_by(module) %>% summarize(n_genes = n()) %>% arrange(n_genes)
Plot eigengenes
Make sure sample info sheet is in the correct order.
rownames(lcpm.filter.t) %>% str_replace_all("\\.", "-") == sample.description$sample
sample.eigen <- cbind(sample.description, MEs)
sample.eigen
sample.eigen.l <- sample.eigen %>%
mutate(gt_tissue=str_c(base_gt, "-", tissue)) %>%
pivot_longer(starts_with("ME"), names_to = "ME")
sample.eigen.means <- sample.eigen.l %>%
group_by(gt_tissue, ME) %>%
summarise(value = mean(value))
sample.eigen.l %>%
ggplot(aes(x=gt_tissue, y = value)) +
geom_point(aes(color = tissue)) +
geom_line(data=sample.eigen.means, group = 1, lwd=.3) +
facet_wrap(~ME, ncol=5) +
theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=.5)) +
scale_color_brewer(type="qual", palette = "Set3")
A heat map:
MEs.m <- as.matrix(MEs)
heatmap.2(MEs.m, trace="none", cexRow= 0.6, col="bluered")
save(module.assignment, MEs, lcpm.filter, CrLFY1, CrLFY2, file="../output/WGCNA_sporophyteSamples.Rdata")